#To-do: 1. Drop multiple per year obs via taking max CLP
#load libraries
library(data.table)
library(tidyr)
library(stringr)
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
#import Data
surveys <- fread("scripts&data/data/input/lake_year_data.csv")
rawdata <- fread("scripts&data/data/input/PCRIsurveysPICharter2srt_sheet1rawdata.csv")
#data explore
#see if we can re-generate ray's calc'd CLP values
# summary(rawdata)
rawdata[ , .N , .(SURVEY_START, DOW) ]
## SURVEY_START DOW N
## <char> <int> <int>
## 1: 6/25/2008 1000100 85
## 2: 6/18/2012 1010400 34
## 3: 6/8/2011 1010500 80
## 4: 5/24/2010 1014200 216
## 5: 6/21/2012 1014900 43
## ---
## 956: 6/20/2005 86023400 181
## 957: 6/25/2017 86025200 836
## 958: 6/27/2013 86025500 39
## 959: 4/22/2017 86026400 158
## 960: 6/20/2007 86047000 33
names(rawdata)[str_detect(names(rawdata), "pota")]
## [1] "potamogeton_crispus" "potamogeton_amplifolius"
## [3] "potamogeton_foliosus" "potamogeton_pusillus"
## [5] "potamogeton_robbinsii" "potamogeton_zosteriformis"
## [7] "potamogeton_friesii" "potamogeton_gramineus"
## [9] "potamogeton_illinoensis" "potamogeton_natans"
## [11] "potamogeton_nodosus" "potamogeton_praelongus"
## [13] "potamogeton_richardsonii" "potamogeton_strictifolius"
## [15] "potamogeton_sp" "potamogeton_obtusifolius"
## [17] "potamogeton_alpinus" "potamogeton_perfoliatus"
## [19] "potamogeton_epihydrus" "potamogeton_spirillus"
## [21] "potamogeton_bicupulatus" "potamogeton_diversifolius"
## [23] "potamogeton_hillii" "potamogeton_vaseyi"
## [25] "potamogeton_obtusifolius.1" "potamogeton_sp.1"
#alltime max vet depth
a <- rawdata[ , .("survey_ident"= .GRP, "totnsamp" = .N, "clpN" = sum(potamogeton_crispus >= 1, na.rm = T), "lat_median" = median(latitude, na.rm = T), "lon_median" = median(longitude, na.rm = T)) , , .(SURVEY_START, DOW) ][ , clpfoc := clpN/totnsamp ,]
#get snow data https://www.dnr.state.mn.us/climate/historical/acis_stn_data_monthly_table.html?sid=mspthr&sname=Twin%20Cities%20Area&sdate=1884-07-01&edate=por&element=snow&span=season&counts=no
snow <- fread("scripts&data/data/input/MSP_area_snow.csv")
snow
## Winter_end_year Jul Aug Sep Oc0 Nov Dec Jan Feb Mar
## <int> <int> <int> <num> <num> <num> <num> <num> <num> <num>
## 1: 2024 0 0 0 2.7 0.5 2.1 2.0 7.0 15.2
## 2: 2023 0 0 0 0.4 13.0 19.8 22.3 15.5 15.5
## 3: 2022 0 0 0 0.0 1.2 21.5 10.5 10.3 5.1
## 4: 2021 0 0 0 9.3 8.8 12.4 7.8 5.9 4.0
## 5: 2020 0 0 0 0.0 14.3 11.3 9.8 7.5 1.3
## ---
## 136: 1889 0 0 0 0.5 0.8 2.2 3.6 3.5 4.1
## 137: 1888 0 0 0 0.4 4.4 17.5 8.1 3.4 10.4
## 138: 1887 0 0 0 0.0 19.7 7.5 22.1 9.7 2.3
## 139: 1886 0 0 0 0.2 0.5 5.3 20.2 2.7 10.4
## 140: 1885 0 0 0 1.1 7.2 14.9 3.5 2.5 3.9
## Apr May Jun ANN
## <num> <num> <int> <num>
## 1: 0.0 0.0 0 29.5
## 2: 3.8 0.0 0 90.3
## 3: 1.6 0.0 0 50.2
## 4: 0.5 0.0 0 48.7
## 5: 7.3 0.0 0 51.5
## ---
## 136: 0.0 0.0 0 14.7
## 137: 2.0 1.0 0 47.2
## 138: 1.0 0.0 0 62.3
## 139: 0.1 0.0 0 39.4
## 140: 1.5 0.4 0 35.0
#convert to cm
snow[ , names(.SD) := lapply(.SD, function(x){x*2.54}), .SDcols = !c("Winter_end_year")]
#add snow to plant abund data:
a[ , date := as.IDate(SURVEY_START, format = "%m/%d/%Y") , ]
a[ , year := year(date) , ]
a[snow, on = .(year = Winter_end_year ) , annual_snowfall := ANN]
a[ DOW %in% a[ ,.N , DOW][N>2, DOW], .N , DOW]
## DOW N
## <int> <int>
## 1: 62005500 12
## 2: 62022500 10
## 3: 27071100 4
## 4: 81000300 7
## 5: 80003400 5
## 6: 2000400 8
## 7: 62000600 13
## 8: 40000200 4
## 9: 70002200 7
## 10: 34003200 3
## 11: 27006202 7
## 12: 27009501 14
## 13: 27011700 13
## 14: 27104200 9
## 15: 27104501 10
## 16: 41004300 11
## 17: 27002800 10
## 18: 27004800 8
## 19: 27004500 7
## 20: 70006900 8
## 21: 70009100 3
## 22: 27004400 5
## 23: 27019200 16
## 24: 2000900 13
## 25: 10000200 19
## 26: 27007000 10
## 27: 70005000 4
## 28: 82009999 5
## 29: 82011800 22
## 30: 27010400 5
## 31: 71014500 8
## 32: 71014700 8
## 33: 62001100 4
## 34: 10004401 4
## 35: 10004800 14
## 36: 27018600 5
## 37: 82010100 16
## 38: 82010300 14
## 39: 10001300 24
## 40: 10004402 4
## 41: 27005500 6
## 42: 18024300 8
## 43: 49013300 7
## 44: 77004600 7
## 45: 62000100 5
## 46: 73015100 6
## 47: 10004100 4
## 48: 10008800 3
## 49: 29025000 6
## 50: 13002700 6
## 51: 62007500 3
## 52: 27007100 5
## 53: 27011800 4
## 54: 3010700 3
## 55: 10004200 7
## 56: 19002200 3
## 57: 27007800 17
## 58: 10004500 3
## 59: 62005400 8
## 60: 71001300 6
## 61: 27019101 11
## 62: 62023100 11
## 63: 27019102 9
## 64: 83004300 7
## 65: 47002600 5
## 66: 47003200 5
## 67: 73003700 3
## 68: 10005300 3
## 69: 19002500 5
## 70: 82010400 11
## 71: 27003501 5
## 72: 82016300 7
## 73: 27013300 8
## 74: 10004300 4
## 75: 62004700 9
## 76: 30013500 4
## 77: 27062700 3
## 78: 61000600 3
## 79: 27003502 3
## 80: 62005600 3
## 81: 10000700 4
## 82: 66001800 3
## 83: 62004800 3
## 84: 82010600 6
## 85: 82010900 4
## 86: 10005100 3
## 87: 86013900 3
## 88: 82007400 3
## 89: 82001000 3
## 90: 82010700 3
## 91: 82000400 3
## DOW N
a[ , county := substr(str_pad(as.character(DOW),pad = "0", side = "left", width = 8), start = 1, stop = 2) , ]
all_state <- a
rm(a)
metro <- all_state[ county %in% c("02", "27", "19", "62", "82", "10" ,"70") ]
metro[ ,.N , DOW][N>2, DOW]
## [1] 62005500 62022500 27071100 2000400 62000600 70002200 27006202 27009501
## [9] 27011700 27104200 27104501 27002800 27004800 27004500 70006900 70009100
## [17] 27004400 27019200 2000900 10000200 27007000 70005000 82009999 82011800
## [25] 27010400 62001100 10004401 10004800 27018600 82010100 82010300 10001300
## [33] 10004402 27005500 62000100 10004100 10008800 62007500 27007100 27011800
## [41] 10004200 19002200 27007800 10004500 62005400 27019101 62023100 27019102
## [49] 10005300 19002500 82010400 27003501 82016300 27013300 10004300 62004700
## [57] 27062700 27003502 62005600 10000700 62004800 82010600 82010900 10005100
## [65] 82007400 82001000 82010700 82000400
metro[ DOW %in% metro[ ,.N , DOW][N>2, DOW] , hist(year) ]
## $breaks
## [1] 2000 2002 2004 2006 2008 2010 2012 2014 2016 2018 2020 2022 2024
##
## $counts
## [1] 2 2 9 25 50 59 79 72 51 73 69 28
##
## $density
## [1] 0.001926782 0.001926782 0.008670520 0.024084778 0.048169557 0.056840077
## [7] 0.076107900 0.069364162 0.049132948 0.070327553 0.066473988 0.026974952
##
## $mids
## [1] 2001 2003 2005 2007 2009 2011 2013 2015 2017 2019 2021 2023
##
## $xname
## [1] "year"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
metro[ , length(unique(year(date))) , DOW ]
## DOW V1
## <int> <int>
## 1: 62005500 11
## 2: 2009100 2
## 3: 62007100 1
## 4: 62022500 9
## 5: 27071100 3
## ---
## 154: 27012100 1
## 155: 70009800 1
## 156: 27017100 1
## 157: 27019100 1
## 158: 19003100 1
metro[ , length(unique(year(date))) , DOW ][ , summary(V1) , ]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 2.000 3.222 4.000 14.000
metro[ ,summary(year(date)) , ]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1999 2012 2015 2016 2020 2023
ggplot(metro[DOW %in% metro[year > 2015 ,.N , DOW][N>2, DOW] & year > 2015, , ] , aes( annual_snowfall, clpfoc), )+
geom_point()+
geom_smooth(method = "lm")+
facet_wrap(~DOW, scales = "free")
## `geom_smooth()` using formula = 'y ~ x'
summary(lm(clpfoc~annual_snowfall, data = metro[DOW %in% metro[,.N , DOW][N>2, DOW], , ]))
##
## Call:
## lm(formula = clpfoc ~ annual_snowfall, data = metro[DOW %in%
## metro[, .N, DOW][N > 2, DOW], , ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.30789 -0.21291 -0.06681 0.15208 0.71285
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3344886 0.0324794 10.298 <2e-16 ***
## annual_snowfall -0.0002417 0.0002237 -1.081 0.28
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2574 on 517 degrees of freedom
## Multiple R-squared: 0.002253, Adjusted R-squared: 0.0003234
## F-statistic: 1.168 on 1 and 517 DF, p-value: 0.2804
plotpanels <- ggplot(metro[DOW %in% metro[ ,.N , DOW][N>2, DOW], , ], aes( annual_snowfall, clpfoc, group = as.factor(DOW)) )+
geom_point(aes(color = as.factor(DOW)))+
geom_smooth(aes(color = as.factor(DOW)), method = "lm", se = F)
ggplotly(plotpanels)
## `geom_smooth()` using formula = 'y ~ x'
So, from that plot I’d say the answer is no, a lake manager can’t just use the MSP snowcover to assess effects on CLP.
To do list: - exclude non 7co metro -DONE - run before 2016 - DONE -
# make a background of "all data"
ggplot(metro , aes( annual_snowfall, clpfoc), )+
geom_point()
ggplot(metro, aes( annual_snowfall, clpfoc) )+
# geom_point(aes(color = as.factor(DOW)), alpha =.5)+
stat_smooth(geom='line', method = "lm" ,alpha=0.25, se = FALSE, aes(color = as.factor(DOW)), linewidth = 1.0)+
theme_bw()+
guides(color="none")+
geom_smooth(method = "lm", linewidth = 1.5)+
scale_y_continuous(name="Curlyleaf pondweed Abundance") +
scale_x_continuous(name="Annual Snowfall in Region (cm)")+
theme(axis.text = element_text( size=10),
axis.title.x = element_text(face="bold", size=15),
axis.title.y = element_text(face = "bold", size=15))
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
summary(lm(clpfoc~annual_snowfall, data = metro)) # the unfiltered
##
## Call:
## lm(formula = clpfoc ~ annual_snowfall, data = metro)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.33227 -0.20975 -0.06528 0.15300 0.72882
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3659833 0.0291857 12.540 <2e-16 ***
## annual_snowfall -0.0004841 0.0002024 -2.392 0.0171 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2561 on 627 degrees of freedom
## Multiple R-squared: 0.00904, Adjusted R-squared: 0.00746
## F-statistic: 5.72 on 1 and 627 DF, p-value: 0.01707
summary(lm(clpfoc~annual_snowfall, data = metro[DOW %in% metro[year < 2016 ,.N , DOW][ ,DOW], , ]))
##
## Call:
## lm(formula = clpfoc ~ annual_snowfall, data = metro[DOW %in%
## metro[year < 2016, .N, DOW][, DOW], , ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.33410 -0.20570 -0.06128 0.15491 0.67803
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3694089 0.0301921 12.235 <2e-16 ***
## annual_snowfall -0.0005123 0.0002136 -2.399 0.0168 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2488 on 517 degrees of freedom
## Multiple R-squared: 0.01101, Adjusted R-squared: 0.009092
## F-statistic: 5.753 on 1 and 517 DF, p-value: 0.01681
summary(lm(clpfoc~annual_snowfall, data = metro[DOW %in% metro[year > 2015 ,.N , DOW][ ,DOW], , ]))
##
## Call:
## lm(formula = clpfoc ~ annual_snowfall, data = metro[DOW %in%
## metro[year > 2015, .N, DOW][, DOW], , ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.3124 -0.2023 -0.0663 0.1476 0.7206
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3451136 0.0338422 10.198 <2e-16 ***
## annual_snowfall -0.0003353 0.0002309 -1.452 0.147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2506 on 469 degrees of freedom
## Multiple R-squared: 0.004477, Adjusted R-squared: 0.002355
## F-statistic: 2.109 on 1 and 469 DF, p-value: 0.1471
# For multiple occ ests per year
metro[ , .N , .(DOW, year(date)) ][ ,hist(N) ,]
## $breaks
## [1] 1.0 1.2 1.4 1.6 1.8 2.0 2.2 2.4 2.6 2.8 3.0
##
## $counts
## [1] 395 0 0 0 108 0 0 0 0 6
##
## $density
## [1] 3.8801572 0.0000000 0.0000000 0.0000000 1.0609037 0.0000000 0.0000000
## [8] 0.0000000 0.0000000 0.0589391
##
## $mids
## [1] 1.1 1.3 1.5 1.7 1.9 2.1 2.3 2.5 2.7 2.9
##
## $xname
## [1] "N"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
setorder(metro,-clpfoc) #order dt by increasing CLP
metro[ , ann_abund_rank := 1:.N , .(DOW, year) ] # tag ranked order of abunds within year
metro_annmax <- metro[ann_abund_rank==1, , ]
plotpanels <- ggplot(metro_annmax[DOW %in% metro_annmax[ ,.N , DOW][N>2, DOW], , ], aes( annual_snowfall, clpfoc, group = as.factor(DOW)) )+
geom_point(aes(color = as.factor(DOW)))+
geom_smooth(aes(color = as.factor(DOW)), method = "lm", se = F)
ggplotly(plotpanels)
## `geom_smooth()` using formula = 'y ~ x'
# make a background of "all data"
ggplot(metro_annmax , aes( annual_snowfall, clpfoc), )+
geom_point()
ggplot(metro_annmax, aes( annual_snowfall, clpfoc) )+
# geom_point(aes(color = as.factor(DOW)), alpha =.5)+
stat_smooth(geom='line', method = "lm" ,alpha=0.25, se = FALSE, aes(color = as.factor(DOW)), linewidth = 1.0)+
theme_bw()+
guides(color="none")+
geom_smooth(method = "lm", linewidth = 1.5)+
scale_y_continuous(name="Curlyleaf pondweed Abundance") +
scale_x_continuous(name="Annual Snowfall in Region (cm)")+
theme(axis.text = element_text( size=10),
axis.title.x = element_text(face="bold", size=15),
axis.title.y = element_text(face = "bold", size=15))
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
summary(lm(clpfoc~annual_snowfall, data = metro_annmax)) # the unfiltered
##
## Call:
## lm(formula = clpfoc ~ annual_snowfall, data = metro_annmax)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.36403 -0.22025 -0.04933 0.15241 0.71161
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4036579 0.0329117 12.265 < 2e-16 ***
## annual_snowfall -0.0005886 0.0002263 -2.601 0.00956 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2609 on 507 degrees of freedom
## Multiple R-squared: 0.01317, Adjusted R-squared: 0.01123
## F-statistic: 6.767 on 1 and 507 DF, p-value: 0.009555
summary(lm(clpfoc~annual_snowfall, data = metro_annmax[DOW %in% metro_annmax[year < 2016 ,.N , DOW][ ,DOW], , ]))
##
## Call:
## lm(formula = clpfoc ~ annual_snowfall, data = metro_annmax[DOW %in%
## metro_annmax[year < 2016, .N, DOW][, DOW], , ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.3634 -0.2166 -0.0391 0.1532 0.6608
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4046176 0.0340364 11.888 <2e-16 ***
## annual_snowfall -0.0006169 0.0002388 -2.584 0.0101 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2525 on 413 degrees of freedom
## Multiple R-squared: 0.01591, Adjusted R-squared: 0.01352
## F-statistic: 6.675 on 1 and 413 DF, p-value: 0.01012
summary(lm(clpfoc~annual_snowfall, data = metro_annmax[DOW %in% metro_annmax[year > 2015 ,.N , DOW][ ,DOW], , ]))
##
## Call:
## lm(formula = clpfoc ~ annual_snowfall, data = metro_annmax[DOW %in%
## metro_annmax[year > 2015, .N, DOW][, DOW], , ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.33576 -0.21749 -0.06175 0.15135 0.70411
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3712641 0.0385391 9.633 <2e-16 ***
## annual_snowfall -0.0003849 0.0002598 -1.481 0.139
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2555 on 377 degrees of freedom
## Multiple R-squared: 0.005786, Adjusted R-squared: 0.003149
## F-statistic: 2.194 on 1 and 377 DF, p-value: 0.1394
#generate a lake specific R2 and p val
lake_ests <- data.table(DOW = metro_annmax[ , .N , DOW][N>2, DOW])
for (i in metro_annmax[ , .N , DOW][N>2, DOW]) {
# i = 27002800
#snow slope
lake_iter <- lm(
clpfoc~annual_snowfall, data = metro_annmax[DOW == i]
)
lake_ests[ DOW == i, snow_slope := summary(lake_iter)$coefficients[2,1] ]
#snow_p
lake_ests[ DOW == i, snow_pval := summary(lake_iter)$coefficients[2,4] ]
# R-squared
lake_ests[ DOW == i, snow_Rsq := summary(lake_iter)$r.squared ]
#mean_clp
lake_ests[ DOW == i, mean_clp := metro_annmax[DOW == i, mean(clpfoc) , ] ]
}
ggplot(lake_ests, aes(snow_slope) )+
geom_histogram( aes(fill = snow_Rsq))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
ggplot(lake_ests, aes(snow_slope, snow_Rsq) )+
geom_point()
ggplot(lake_ests, aes(snow_slope, snow_pval) )+
geom_point( )
lake_ests[snow_Rsq>.5 ][order(snow_slope)]
## DOW snow_slope snow_pval snow_Rsq mean_clp
## <int> <num> <num> <num> <num>
## 1: 62004800 -0.0057816350 0.04244564 0.9955612 0.47719669
## 2: 62004700 -0.0036931618 0.01561386 0.5900736 0.29915419
## 3: 27009501 -0.0030702958 0.04579329 0.5125521 0.34616406
## 4: 27013300 -0.0026610718 0.07577287 0.5866440 0.18209717
## 5: 10004100 -0.0023874112 0.23076456 0.8742602 0.41721847
## 6: 27007000 -0.0016980570 0.13142744 0.5859693 0.63418643
## 7: 82010900 -0.0011778235 0.27914833 0.5196271 0.64150943
## 8: 27071100 -0.0009989825 0.14348515 0.9500555 0.10133333
## 9: 27003501 -0.0003564211 0.01362021 0.9729451 0.09359121
## 10: 27003502 -0.0003389785 0.45194584 0.5751969 0.03252449
## 11: 82010600 0.0001407120 0.02180915 0.7688957 0.01805344
## 12: 82010700 0.0002826068 0.30150868 0.7919721 0.04237288
## 13: 10000700 0.0014046888 0.10930261 0.7933418 0.23626874
## 14: 82009999 0.0016046396 0.12713377 0.9606467 0.11111111
## 15: 70009100 0.0021168454 0.09945027 0.9757944 0.52686552
## 16: 62001100 0.0032352354 0.22852207 0.8765867 0.35258547
## 17: 27062700 0.0040129242 0.41514387 0.6317185 0.80000000
metro_slopes <- metro_annmax[lake_ests, on = .(DOW)]
#export table of lakes with a snow R2 greater than .5
#
# fwrite(lake_ests[snow_Rsq>.5 ][order(snow_slope)], file = "scripts&data/data/output/snow_pred_lakes.csv")
So, from that plot I’d say the answer is no, a lake manager can’t just use the MSP snowcover to assess effects on CLP.